In [2]:
[x**2 for x in range(0,10)]
Out[2]:
In [4]:
[x for x in range(1,20) if x%2==0 ]
Out[4]:
In [6]:
[x for x in 'MATHEMATICS' if x in ['A','E','I','O','U']]
Out[6]:
In [14]:
for i in range(1,101):
if int(i**0.5)==i**0.5:
print i
In [22]:
[i for i in range(1,101) if int(i**0.5)==i**0.5]
Out[22]:
In [69]:
import numpy as np
In [47]:
# matrix = [ range(0,5), range(5,10), range(10,15) ]
# print matrix
In [25]:
def eg1_for(matrix):
flat = []
for row in matrix:
for x in row:
flat.append(x)
return flat
In [26]:
def eg1_lc(matrix):
return [x for row in matrix for x in row ]
In [28]:
matrix = [ range(0,5), range(5,10), range(10,15) ]
print "Original Matrix: " + str(matrix)
print "FOR-loop result: " + str(eg1_for(matrix))
print "LC result : " + str(eg1_lc(matrix))
In [29]:
%timeit eg1_for(matrix)
In [30]:
%timeit eg1_lc(matrix)
In [31]:
def eg2_for(sentence):
vowels = 'aeiou'
filtered_list = []
for l in sentence:
if l not in vowels:
filtered_list.append(l)
return ''.join(filtered_list)
eg2_for('My name is Aarshay Jain!')
Out[31]:
In [32]:
def eg2_lc(sentence):
vowels = 'aeiou'
return ''.join([ l for l in sentence if l not in vowels])
eg2_for('My name is Aarshay Jain!')
Out[32]:
In [133]:
sentence = 'My name is Aarshay Jain!'
print "FOR-loop result: " + eg2_for(sentence)
print "LC result : " + eg2_lc(sentence)
In [33]:
%timeit eg2_for('My name is Aarshay Jain!')
In [34]:
%timeit eg2_lc('My name is Aarshay Jain!')
In [35]:
country = ['India', 'Pakistan', 'Nepal', 'Bhutan', 'China', 'Bangladesh']
capital = ['New Delhi', 'Islamabad','Kathmandu', 'Thimphu', 'Beijing', 'Dhaka']
In [36]:
def eg3_for(keys, values):
dic = {}
for i in range(len(keys)):
dic[keys[i]] = values[i]
return dic
eg3_for(country,capital)
Out[36]:
In [37]:
def eg3_lc(keys, values):
return { keys[i] : values[i] for i in range(len(keys)) }
eg3_lc(country,capital)
Out[37]:
In [146]:
country = ['India', 'Pakistan', 'Nepal', 'Bhutan', 'China', 'Bangladesh']
capital = ['New Delhi', 'Islamabad','Kathmandu', 'Thimphu', 'Beijing', 'Dhaka']
print "FOR-loop result: " + str(eg3_for(country, capital))
print "LC result : " + str(eg3_lc(country, capital))
In [38]:
%timeit eg3_for(country,capital)
In [40]:
%timeit eg3_lc(country,capital)
In [16]:
#FOR:
def eg4_for(N):
non_primes = []
for i in range(2,int(N**0.5)+1):
for j in range(i,N,i):
# print j
non_primes.append(j)
primes = []
for i in range(2,N):
if i not in non_primes:
primes.append(i)
return primes
print eg4_for(100)
%timeit eg4_for(100)
In [18]:
#LC:
def eg4_lc(N):
non_primes = [ j for i in range(2,int(N**0.5)+1) for j in range(i,N,i)]
return [ i for i in range(2,N) if i not in non_primes]
print eg4_lc(100)
%timeit eg4_lc(100)
In [1]:
mat1 = [ range(0,5), range(5,10) ]
mat2 = [ range(0,2), range(2,4), range(4,6), range(6,8), range(8,10) ]
print mat1 , mat2
In [7]:
def eg2_for(mat1, mat2):
mat1_row = len(mat1)
mat2_row = len(mat2) #also num of col of mat1
mat2_col = len(mat2[0])
matm2 = [ [0]*mat2_col for i in range(mat1_row) ]
for row in range(mat1_row):
for col in range(mat2_col):
for i in range(mat2_row):
matm2[row][col] += (mat1[row][i]*mat2[i][col])
return matm2
print eg2_for(mat1,mat2)
%timeit eg2_for(mat1,mat2)
In [10]:
def eg2_lc(mat1, mat2):
mat1_row = len(mat1)
mat2_row = len(mat2) #also num of col of mat1
mat2_col = len(mat2[0])
matm = [ sum( [mat1[row][i]*mat2[i][col] for i in range(mat2_row)] ) for row in range(mat1_row) for col in range(mat2_col) ]
return matm
print eg2_lc(mat1,mat2)
%timeit eg2_lc(mat1,mat2)
In [5]:
%timeit eg2_for(mat1,mat2)
In [6]:
%timeit eg2_lc(mat1,mat2)
In [13]:
def tri_for(N):
L=[]
for i in range(1,N-2):
for j in range(i+1,N-1):
for k in range(j+1, N):
if (i+j<k) | (i+k<j) | (j+k<i):
L.append((i,j,k))
return L
def tri_lc(N):
return [(i,j,k) for i in range(1,N-2) for j in range(i+1,N-1) for k in range(j+1,N) if ((i+j<k) | (i+k<j) | (j+k<i))]
# [ (i,j,k) for i in range(1,N-2) for j in range(i+1,N-1) for k in range(j+1,N) ]
In [14]:
print tri_for(10)
%timeit tri_for(10)
In [15]:
print tri_lc(10)
%timeit tri_lc(10)
In [1]:
arr = range(10) #contains [0,1,...,9]
map(lambda x: x*(x+1), arr)
Out[1]:
Here we have used the Python temporary function lambda. This can be replaced with a standard Python function or a user-defined function declared earlier.
In [1]:
#Method 1: For-Loop
def square_for(arr):
result = []
for i in arr:
result.append(i**2)
return result
print square_for(range(1,11))
In [2]:
#Method 2: Map Function
def square_map(arr):
return map(lambda x: x**2, arr)
print square_map(range(1,11))
In [3]:
#Method 3: List comprehension:
def square_lc(arr):
return [i**2 for i in arr]
print square_lc(range(1,11))
Though the three techniques produce the same result, we can see that LC is the most elegant and readable technique. You might argue that even the map function is not bad in this case. But map has its own limitations which are not evident in this example.
Let's include a catch here. What if we want the square of only even numbers in the list? The three functions would look like:
In [7]:
#Method 1: For-Loop
def square_even_for(arr):
result = []
for i in arr:
if i%2 == 0:
result.append(i**2)
return result
print square_even_for(range(1,11))
In [10]:
#Method 2: Map Function
def square_even_map(arr):
return filter(lambda x: x is not None,map(lambda x: x**2 if x%2==0 else None, arr))
print square_even_map(range(1,11))
In [11]:
#Method 3: List comprehension:
def square_even_lc(arr):
return [i**2 for i in arr if i%2==0]
print square_even_lc(range(1,11))
It is clearly evident that with the slight increase in complexity, both for and map routines became bulkier and less readable. However, the LC routine is still concise and required a minor modification.
Before going into more complex examples, let's try to appreciate another advantage of using LC - lower computational time!
Let us compare the time taken for each of the above functions to run. We'll be using the %timeit magic function of iPython notebook to determine the runtime. Alternatively, you can use the time or timeit modules.
Now you will be able to appreciate the importance of writing each code fragment as a function. Also, we shall focus on the relative run times and not the absolute values because it is subject to the machine specs. FYI, I am using a Dell XPS 14Z system with following specs: 2nd Gen i5 (2.5GHz) | 4GB RAM | 64-bit OS | Windows 7 Home Premium
Let's compare the time for first example:
In [5]:
%timeit square_for(range(1,11))
In [6]:
%timeit square_map(range(1,11))
In [7]:
%timeit square_lc(range(1,11))
Here we can see that in this case LC is ~30% faster than for-loop and ~45% faster than map function.
Let's check for the second example:
In [14]:
%timeit square_even_for(range(1,11))
In [15]:
%timeit square_even_map(range(1,11))
In [16]:
%timeit square_even_lc(range(1,11))
In this case, LC is ~20% faster than for-loop and ~65% faster than map function.
Now this is something incredible. Not only is LC more elegant but also faster than its counterparts. Yes, even I want to get into advanced applications of LC. But hang on! I am not convinced. Why is LC faster? Will it faster in all scenarios or are these special cases? Let's try to find out!
I would not doubt your intellectual skills at this point if you are still wondering why is LC faster. After all it's following the same process:
Let's try to inspect each element one by one. Let's simply call a function that does nothing and check for iteration times:
In [17]:
#Method 1: For-loop:
def empty_for(arr):
for i in arr:
pass
%timeit empty_for(range(1,11))
In [18]:
#Method 2: Map
def empty_map(arr):
map(lambda x: None,arr)
%timeit empty_map(range(1,11))
In [19]:
#Method 3: LC
def empty_lc(arr):
[None for i in arr]
%timeit empty_lc(range(1,11))
Here we see that for-loop is fasters. This is because in a for-loop, we need not return an element and just move onto next iteration using "pass". In both LC and map, returning an element is necessary. The codes here return None. But still map takes more than twice the time. Intuitively, we can think that map involves a definite function call at each iteration which can be the reason behind the extra time.
Now, lets perform a simple operation of multiplying the number by 2 but we need not store the result:
In [20]:
#Method 1: For-loop:
def x2_for(arr):
for i in arr:
i*2
%timeit x2_for(range(1,11))
In [21]:
#Method 2: Map
def x2_map(arr):
map(lambda x: x*2,arr)
%timeit x2_map(range(1,11))
In [23]:
#Method 3: LC
def x2_lc(arr):
[i*2 for i in arr]
%timeit x2_lc(range(1,11))
Here we see a similar trend as before. So till the point of iterating and making slight modifications, for-loop is clear winner. LC is close to for-loop but again map takes around twice as much time. Note that here the difference between time will also depend on the complexity of the function being applied to each element.
Another intuition for higher time of map and LC can be that in both cases, it is compulsory to store information and we are actually performing all 3 steps for LC and map. So let's check runtime of for-loop with step 3:
In [30]:
def store_for(arr):
result=[]
for i in arr:
result.append(i*2)
return result
%timeit store_for(range(1,11))
This is interesting! So the runtime jumps to almost twice just because of storing the information. The reason being that we have to define an empty list and append the result to each in each iteration.
After all 3 steps, LC seem to the clear winner. But are you 100% sure why? Not sure about you, but I am not convinced. My intuition says that probably map is slower because it has to make function calls at each step. LC might just be calculating the value of the same expression for all elements.
We can quickly check this out. Let's make a function call in LC as well:
In [24]:
def x2_lc(arr):
def mul(x):
return x*2
[mul(i) for i in arr]
%timeit x2_lc(range(1,11))
Aha! So the guess was right. When we force LC to make function calls, it ends up being more expensive than map function.
So I guess the bottom line is that LC is faster in case where simple expressions are required to be applied to each element. But if complex functions are required, map and LC would be nearly the same. We can choose the one which works best.
As promised, let's think of a slightly advanced application of LC:
In [34]:
def my_first_gen(n):
for i in range(n):
yield i
In [35]:
print my_first_gen(10)
In [61]:
gen = my_first_gen(3)
In [36]:
print gen.next()
In [40]:
def flow_of_info_gen(N):
print 'function runs for first time'
for i in range(N):
print 'execution before yielding value %d' % i
yield i
print 'execution after yielding value %d' % i
print 'function runs for last time'
In [41]:
gen2 = flow_of_info_gen(3)
gen2.next()
Out[41]:
In [42]:
gen2.next()
Out[42]:
In [43]:
gen2.next()
Out[43]:
In [44]:
gen2.next()
In [65]:
gen3 = my_first_gen(10)
gen3.next()
gen3.next()
gen3.next()
gen3.next()
sum(gen3)
Out[65]:
In [52]:
#LC returning a list
[x for x in range(10)]
Out[52]:
In [67]:
#LC working as a generator
(x for x in range(10))
Out[67]:
In [74]:
sum(x for x in range(10))
Out[74]:
In [11]:
def sum_list(N):
return sum([x for x in range(N)])
In [12]:
def sum_gen(N):
return sum((x for x in range(N)))
In [19]:
N=1000
print 'Time for LC : ',
%timeit sum_list(N)
print '\nTime for Generator : ',
%timeit sum_gen(N)
In [20]:
N=100000 #100K
print 'Time for LC : ',
%timeit sum_list(N)
print '\nTime for Generator : ',
%timeit sum_gen(N)
In [21]:
N=10000000 #10Mn
print 'Time for LC : ',
%timeit sum_list(N)
print '\nTime for Generator : ',
%timeit sum_gen(N)
In [22]:
N=100000000 #100Mn
print '\nTime for Generator : ',
%timeit sum_gen(N)
print 'Time for LC : ',
%timeit sum_list(N)
In [2]:
import pandas as pd
data = pd.read_csv("skills.csv")
print data
In [3]:
#Split text with the separator ';'
data['skills_list'] = data['skills'].apply(lambda x: x.split(';'))
print data['skills_list']
In [4]:
#Initialize the set
skills_unq = set()
#Update each entry into set. Since it takes only unique value, duplicates will be ignored automatically.
skills_unq.update( (sport for l in data['skills_list'] for sport in l) )
print skills_unq
In [5]:
#Convert set to list:
skills_unq = list(skills_unq)
sport_matrix = [ [1 if skill in row else 0 for skill in skills_unq] for row in data['skills_list'] ]
sport_matrix
Out[5]:
In [17]:
data = pd.concat([data, pd.DataFrame(sport_matrix,columns=skills_unq)],axis=1)
print data
In [9]:
data2 = pd.DataFrame([1,2,3,4,5], columns=['number'])
print data2
In [10]:
deg = 6
cols = ['power_%d'%i for i in range(2,deg+1)]
print cols
In [11]:
power_matrix = [ [i**p for p in range(2,deg+1) ] for i in data2['number'] ]
power_matrix
Out[11]:
In [12]:
data2 = pd.concat([data2, pd.DataFrame(power_matrix,columns=cols)],axis=1)
In [13]:
print data2
In [14]:
cols = ['a','b','c','d','a_transform','b_transform','c_transform','d_power2','d_power3','d_power4','d_power5','temp1','temp2']
#Here a,b,c,d are original variables; transform are transformation, power are for polynomial reg, temp are intermediate
In [15]:
#Select only variables with 'transform':
col_set1 = [x for x in cols if x.endswith('transform')]
col_set2 = [x for x in cols if 'power' in x]
col_set3 = [x for x in cols if (x.endswith('transform')) | ('power' in x)]
col_set4 = [x for x in cols if x not in ['temp1','temp2']]
In [16]:
print 'Set1: ', col_set1
print 'Set2: ', col_set2
print 'Set3: ', col_set3
print 'Set4: ', col_set4